<!DOCTYPE HTML>
<!--
	Story by HTML5 UP
	html5up.net | @ajlkn
	Free for personal and commercial use under the CCA 3.0 license (html5up.net/license)

	Note: Only needed for demo purposes. Delete for production sites.
-->
<html>
<head>
    <title>ViP3D: End-to-end Visual Trajectory Prediction via 3D Agent Queries</title>
    <meta charset="utf-8"/>
    <meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no"/>
    <link rel="stylesheet" href="assets/css/main.css"/>
    <link rel="icon" href="images/logo_icon.png"/>
    <noscript>
        <link rel="stylesheet" href="assets/css/noscript.css"/>
    </noscript>
    <style>
        canvas {
            position: relative;
            top: -20px;
            left: 50%;
            transform: translate(-50%, 0%);
            background-color: white;
        }
    </style>
</head>
<body class="is-preload">

<!-- Wrapper -->
<section id="wrapper" class="divided">

    <!--    <div>-->
    <!--        <div class="canvas">-->
    <!--            <h1>canvas</h1>-->
    <!--        </div>-->
    <!--    </div>-->

    <!-- One -->
    <section
            class="banner style1 orient-left content-align-center image-position-center fullscreen onload-image-fade-in onload-content-fade-right">
        <div class="content">
            <div style="align-items: center; display: flex; flex-direction: row; width: 1200px; justify-content: flex-start; margin-left: -200px; margin-top: -100px">
                <div class="image" style="width: 200px; height: 250px; background-color: white; margin-top: -30px">
                    <img src="images/logo.png" alt=""/>
                </div>
                <h2 style="">ViP3D: End-to-end Visual Trajectory Prediction <br> via 3D Agent Queries</h2>
            </div>
            <h5>Junru Gu<sup>1</sup>*, 
                Chenxu Hu<sup>1</sup>*, 
                Tianyuan Zhang<sup>2,3</sup>, 
                Xuanyao Chen<sup>2,4</sup>, 
                <br/>
                Yilun Wang<sup>5</sup>, 
                Yue Wang<sup>6</sup>, 
                Hang Zhao<sup>1,2</sup>
            <h5>
                <sup>1</sup> IIIS, Tsinghua University,
                <sup>2</sup> Shanghai Qi Zhi Institute,
                <br/>
                <sup>3</sup> CMU,
                <sup>4</sup> Fudan University,
                <sup>5</sup> Li Auto,
                <sup>6</sup> MIT
            </h5>
            <h6 style="margin-top: -20px"><small>* Equal contribution</small></h6>
            <h5>
                Conference on Computer Vision and Pattern Recognition (CVPR), 2023
            </h5>
            <!-- <p>* Denotes equal contribution</p> -->
            <!--            <h3>CVPR 2022</h3>-->
            <h5>ViP3D is the first vision-based approach to predict future trajectories of agents for autonomous
                driving, modeling agent-level detection, tracking and prediction.</h5>
            <!-- <p class="major">A (modular, highly tweakable) responsive one-page template designed by <a href="https://html5up.net">HTML5 UP</a> and released for free under the <a href="https://html5up.net/license">Creative Commons</a>.</p> -->
            <div class="inner">
                <ul class="icons">
                    <li><a href="https://github.com/Tsinghua-MARS-Lab/ViP3D" class="icon brands style2 fa-github"><span
                            class="label">Github</span></a>
                    </li>
                    <li><a href="https://arxiv.org/abs/2208.01582" target="_blank" class="icon style2 fa-file-pdf"><span
                            class="label">Arxiv</span></a>
                    </li>
                </ul>
            </div>

<!--            <ul class="actions stacked">-->
<!--                <li><a href="#first" class="button big wide smooth-scroll-middle">Learn More</a></li>-->
<!--            </ul>-->
        </div>
        <!--        <div class="image" style="width: 600px; ">-->
        <!--            <img src="images/teaser.png" alt=""/>-->
        <!--        </div>-->
    </section>

    <section>
        <span class="video">
        <video controls autoplay muted loop class="html-video" width=100% height=auto>
            <source src="videos/demo.mp4" type="video/mp4">
            </video>
        </span>

<!--        <a href="#btm" style="justify-content: center; display: flex">Demo</a>-->
    </section>

    <!-- Method -->
    <section class="wrapper style1 align-left">
        <div class="inner">
            <h2>Pipelines</h2>
            <p>The traditional pipeline involves multiple non-differentiable modules, e.g., detection, tracking, and
                prediction. ViP3D takes multi-view videos as input and generates predicted trajectories in an end-to-end
                manner, which can effectively leverage visual information such as turning signals of vehicles.
            </p>
            <span class="image main"><img src="images/motivation.png" style="" alt=""/></span>
        </div>
    </section>

    <section class="wrapper style1 align-left">
        <div class="inner">
            <h2>Related Projects</h2>
            <div class="col text-center">
                <table width="100%" style="margin: 0pt 0pt; text-align: center;">
                    <tbody>
                    <tr>
                        <td>
                            BEV Vectorized Mapping<br>
                            <a href="https://tsinghua-mars-lab.github.io/vectormapnet/" class="d-inline-block p-3"><img
                                    height="100" src="images/VectorMapNet_thumbnail.png" style="border:1px solid"
                                    data-nothumb=""><br>VectorMapNet</a>
                        </td>

                        <td>
                            BEV Detection<br>
                            <a href="https://tsinghua-mars-lab.github.io/detr3d/" class="d-inline-block p-3"><img
                                    height="100" src="images/detr3d_thumbnail.png" style="border:1px solid"
                                    data-nothumb=""><br>DETR3D</a>
                        </td>

                        <td>
                            BEV Fusion<br>
                            <a href="https://tsinghua-mars-lab.github.io/futr3d/" class="d-inline-block p-3"><img
                                    height="100" src="images/futr3d_thumbnail.png" style="border:1px solid"
                                    data-nothumb=""><br>FUTR3D</a>
                        </td>

                        <td>
                            BEV Tracking<br>
                            <a href="https://tsinghua-mars-lab.github.io/mutr3d/" class="d-inline-block p-3"><img
                                    height="100" src="images/mutr3d_thumbnail.png" style="border:1px solid"
                                    data-nothumb=""><br>MUTR3D</a>
                        </td>

                    </tr>
                    </tbody>
                </table>
            </div>
        </div>
    </section>

    <!--    <section class="wrapper style1 align-left">-->
    <!--        <div class="title" id="lang">Related Projects on <a href="https://vcad-ai.github.io/">VCAD (Vision-Centric-->
    <!--            Autonomous Driving)</a></div>-->
    <!--    </section>-->

    <section class="wrapper style1 align-left">
        <div class="inner">
            <h2>Citation</h2>
            <p>If you find our work intriguing, inspiring or useful to your research, please consider citing:</p>
            <blockquote style="text-align:left; background-color:#EEEEEE">
                @article{vip3d,<br>
                title={ViP3D: End-to-end Visual Trajectory Prediction via 3D Agent Queries},<br>
                author={Gu, Junru and Hu, Chenxu and Zhang, Tianyuan and Chen, Xuanyao and Wang, Yilun and Wang, Yue and
                Zhao, Hang},<br>
                journal={arXiv preprint arXiv:2208.01582},<br>
                year={2022}<br>
                }
            </blockquote>
        </div>
    </section>


<!--     <section class="wrapper style1 align-left">-->
<!--        <div class="inner">-->
<!--            <h2>-->
<!--                Demo-->
<!--                <h1 style="position: absolute; left: 10px; ">-->
<!--                    <div id="btm" style="margin-top: 500px; opacity: 0%">b-->
<!--                    </div>-->
<!--                </h1>-->
<!--            </h2>-->

<!--        </div>-->
<!--    </section> &ndash;&gt;-->


    </div>

    <!-- Scripts -->
    <script src="assets/js/jquery.min.js"></script>
    <script src="assets/js/jquery.scrollex.min.js"></script>
    <script src="assets/js/jquery.scrolly.min.js"></script>
    <script src="assets/js/browser.min.js"></script>
    <script src="assets/js/breakpoints.min.js"></script>
    <script src="assets/js/util.js"></script>
    <script src="assets/js/main.js"></script>

</body>
<!-- <script type="module">
  import init from './target/wasm_example.js'

  init()
</script> -->
</html>
